# =============================================================================
# STATIC BAYESIAN LATENT VARIABLE MODEL
# Bayesian estimation of legislative power-sharing using V-Dem indicators
# Adapted from Reuning, Kenwick, and Farris (2019); code pulled from Political Analysis Dataverse.
# =============================================================================

# NOTE: This model requires high-performance computing resources
# It is too computationally intensive for local execution

# --- PACKAGES ----------------------------------------------------------------
# install.packages("rstan", repos = "http://cran.us.r-project.org")
library(rstan)
# install.packages("loo", repos = "http://cran.us.r-project.org")
library(loo)
# install.packages("coda", repos = "http://cran.us.r-project.org")
library(coda)


# --- CONFIGURATION -----------------------------------------------------------
# Clear workspace
rm(list = ls())

# --- HELPER FUNCTIONS --------------------------------------------------------
## Inverse logit function 
inv.logit <- function(x){1/(1+exp(-x))}

## Number of samples
n.samples <- 1000

# --- DATA PREP --------------------------------------------------------

df <- read.csv("data_lvm.csv")


prepare.data <- function(data, include.extra=FALSE)
{
  if (include.extra) {
    final <- data.frame(cbind(
      unclass(factor(data$v2exdfdshs_ord)),
      unclass(factor(data$v2exdfvths_ord)),
      unclass(factor(data$v2exdfpphs_ord)),
      unclass(factor(data$v2lginvstp_ord)),
      unclass(factor(data$v2lgoppart_ord)), 
      unclass(factor(data$v2lgfunds_ord)),
      unclass(factor(data$v2lgcomslo_ord)), 
      unclass(factor(data$v2lgintbup)),
      unclass(factor(data$v2lgintblo)),
      unclass(factor(data$v2lgqstexp)),
      unclass(factor(data$v2lgbicam))
    ))
    names(final)<-c('v2exdfdshs_ord','v2exdfvths_ord','v2exdfpphs_ord','v2lginvstp_ord',
                    'v2lgoppart_ord','v2lgfunds_ord','v2lgcomslo_ord','v2lgintbup',
                    'v2lgintblo','v2lgqstexp','v2lgbicam')
  } else {
    final <- data.frame(cbind(
      unclass(factor(data$v2exdfdshs_ord)),
      unclass(factor(data$v2exdfvths_ord)),
      unclass(factor(data$v2exdfpphs_ord)),
      unclass(factor(data$v2lginvstp_ord)),
      unclass(factor(data$v2lgoppart_ord)), 
      unclass(factor(data$v2lgfunds_ord)),
      unclass(factor(data$v2lgcomslo_ord)), 
      unclass(factor(data$v2lgintbup)),
      unclass(factor(data$v2lgintblo)),
      unclass(factor(data$v2lgqstexp)),
      unclass(factor(data$v2lgbicam))
    ))
    names(final)<-c('v2exdfdshs_ord','v2exdfvths_ord','v2exdfpphs_ord','v2lginvstp_ord',
                    'v2lgoppart_ord','v2lgfunds_ord','v2lgcomslo_ord','v2lgintbup',
                    'v2lgintblo','v2lgqstexp','v2lgbicam')
  }
  rownames(final) <- paste(1:length(data$country), data$country, data$year)
  return (final)
}

df <- prepare.data(df)

df$count.year <- rownames(df)
for(ii in 1:nrow(df)){
  tmp <- strsplit(df$count.year[ii], " ")[[1]]
  tmp <- tmp[-1]
  year <- as.numeric(tmp[length(tmp)])
  df$year[ii] <- as.numeric(tmp[length(tmp)])
  tmp <- tmp[-length(tmp)]
  count <- paste(tmp, collapse = " ")
  df$count[ii] <- paste(tmp, collapse = " ")
  df$year.count[ii] <- paste(year, count)
}

df$count.year.id <- as.numeric(as.factor(df$year.count))
df <- df[order(df$year.count),]

# --- LEGISLATIVE INDICATORS --------------------------------------------------------

# v2exdfdshs_ord
ind_4_v2exdfdshs <- df[,1]
year_count_ind_4_v2exdfdshs <- df$count.year.id
year_count_ind_4_v2exdfdshs <- year_count_ind_4_v2exdfdshs[!is.na(ind_4_v2exdfdshs)]
ind_4_v2exdfdshs <- ind_4_v2exdfdshs[!is.na(ind_4_v2exdfdshs)]
n_4_v2exdfdshs <- length(ind_4_v2exdfdshs)

# v2exdfvths_ord
ind_5_v2exdfvths <- df[,2]
year_count_ind_5_v2exdfvths <- df$count.year.id
year_count_ind_5_v2exdfvths <- year_count_ind_5_v2exdfvths[!is.na(ind_5_v2exdfvths)]
ind_5_v2exdfvths <- ind_5_v2exdfvths[!is.na(ind_5_v2exdfvths)]
n_5_v2exdfvths <- length(ind_5_v2exdfvths)

# v2exdfpphs_ord
ind_3_v2exdfpphs <- df[,3]
year_count_ind_3_v2exdfpphs <- df$count.year.id
year_count_ind_3_v2exdfpphs <- year_count_ind_3_v2exdfpphs[!is.na(ind_3_v2exdfpphs)]
ind_3_v2exdfpphs <- ind_3_v2exdfpphs[!is.na(ind_3_v2exdfpphs)]
n_3_v2exdfpphs <- length(ind_3_v2exdfpphs)

# v2lginvstp_ord
ind_5_v2lginvstp <- df[,4]
year_count_ind_5_v2lginvstp <- df$count.year.id
year_count_ind_5_v2lginvstp <- year_count_ind_5_v2lginvstp[!is.na(ind_5_v2lginvstp)]
ind_5_v2lginvstp <- ind_5_v2lginvstp[!is.na(ind_5_v2lginvstp)]
n_5_v2lginvstp <- length(ind_5_v2lginvstp)

# v2lgoppart_ord
ind_3_v2lgoppart <- df[,5]
year_count_ind_3_v2lgoppart <- df$count.year.id
year_count_ind_3_v2lgoppart <- year_count_ind_3_v2lgoppart[!is.na(ind_3_v2lgoppart)]
ind_3_v2lgoppart <- ind_3_v2lgoppart[!is.na(ind_3_v2lgoppart)]
n_3_v2lgoppart <- length(ind_3_v2lgoppart)

# v2lgfunds_ord
ind_2_v2lgfunds <- df[,6]
year_count_ind_2_v2lgfunds <- df$count.year.id
year_count_ind_2_v2lgfunds <- year_count_ind_2_v2lgfunds[!is.na(ind_2_v2lgfunds)]
ind_2_v2lgfunds <- ind_2_v2lgfunds[!is.na(ind_2_v2lgfunds)]
n_2_v2lgfunds <- length(ind_2_v2lgfunds)

# v2lgcomslo_ord
ind_4_v2lgcomslo <- df[,7]
year_count_ind_4_v2lgcomslo <- df$count.year.id
year_count_ind_4_v2lgcomslo <- year_count_ind_4_v2lgcomslo[!is.na(ind_4_v2lgcomslo)]
ind_4_v2lgcomslo <- ind_4_v2lgcomslo[!is.na(ind_4_v2lgcomslo)]
n_4_v2lgcomslo <- length(ind_4_v2lgcomslo)

# v2lgintbup
ind_2_v2lgintbup <- df[,8]
year_count_ind_2_v2lgintbup <- df$count.year.id
year_count_ind_2_v2lgintbup <- year_count_ind_2_v2lgintbup[!is.na(ind_2_v2lgintbup)]
ind_2_v2lgintbup <- ind_2_v2lgintbup[!is.na(ind_2_v2lgintbup)]
n_2_v2lgintbup <- length(ind_2_v2lgintbup)

# v2lgintblo
ind_2_v2lgintblo <- df[,9]
year_count_ind_2_v2lgintblo <- df$count.year.id
year_count_ind_2_v2lgintblo <- year_count_ind_2_v2lgintblo[!is.na(ind_2_v2lgintblo)]
ind_2_v2lgintblo <- ind_2_v2lgintblo[!is.na(ind_2_v2lgintblo)]
n_2_v2lgintblo <- length(ind_2_v2lgintblo)

# v2lgqstexp
ind_2_v2lgqstexp <- df[,10]
year_count_ind_2_v2lgqstexp <- df$count.year.id
year_count_ind_2_v2lgqstexp <- year_count_ind_2_v2lgqstexp[!is.na(ind_2_v2lgqstexp)]
ind_2_v2lgqstexp <- ind_2_v2lgqstexp[!is.na(ind_2_v2lgqstexp)]
n_2_v2lgqstexp <- length(ind_2_v2lgqstexp)

# v2lgbicam
ind_3_v2lgbicam <- df[,11]
year_count_ind_3_v2lgbicam <- df$count.year.id
year_count_ind_3_v2lgbicam <- year_count_ind_3_v2lgbicam[!is.na(ind_3_v2lgbicam)]
ind_3_v2lgbicam <- ind_3_v2lgbicam[!is.na(ind_3_v2lgbicam)]
n_3_v2lgbicam <- length(ind_3_v2lgbicam)

# --- ORDER DF --------------------------------------------------------
df <- df[order(df$count.year.id),]

df$prev.year <- NA
for(ii in 1:nrow(df)){
  tmp.c <-df$count[ii]
  tmp.y <-df$year[ii]
  tmp.df <- df[df$count==tmp.c & df$year < tmp.y, ]
  if(nrow(tmp.df)==0){
    df$prev.year[ii] <- 0
  } else {
    tmp.df <- tmp.df[order(tmp.df$year), ]
    df$prev.year[ii] <- tmp.df$count.year.id[nrow(tmp.df)]
  }
}

# --- STAN DATA LIST --------------------------------------------------------
stan.data <- list(
  years_count_ind_4_v2exdfdshs = year_count_ind_4_v2exdfdshs,
  items_4_v2exdfdshs = ind_4_v2exdfdshs,
  n_4_v2exdfdshs = n_4_v2exdfdshs,
  years_count_ind_5_v2exdfvths = year_count_ind_5_v2exdfvths,
  items_5_v2exdfvths = ind_5_v2exdfvths,
  n_5_v2exdfvths = n_5_v2exdfvths,
  years_count_ind_3_v2exdfpphs = year_count_ind_3_v2exdfpphs,
  items_3_v2exdfpphs = ind_3_v2exdfpphs,
  n_3_v2exdfpphs = n_3_v2exdfpphs,
  years_count_ind_5_v2lginvstp = year_count_ind_5_v2lginvstp,
  items_5_v2lginvstp = ind_5_v2lginvstp,
  n_5_v2lginvstp = n_5_v2lginvstp,
  years_count_ind_3_v2lgoppart = year_count_ind_3_v2lgoppart,
  items_3_v2lgoppart = ind_3_v2lgoppart,
  n_3_v2lgoppart = n_3_v2lgoppart,
  years_count_ind_2_v2lgfunds = year_count_ind_2_v2lgfunds,
  items_2_v2lgfunds = ind_2_v2lgfunds,
  n_2_v2lgfunds = n_2_v2lgfunds,
  years_count_ind_4_v2lgcomslo = year_count_ind_4_v2lgcomslo,
  items_4_v2lgcomslo = ind_4_v2lgcomslo,
  n_4_v2lgcomslo = n_4_v2lgcomslo,
  years_count_ind_2_v2lgintbup = year_count_ind_2_v2lgintbup,
  items_2_v2lgintbup = ind_2_v2lgintbup,
  n_2_v2lgintbup = n_2_v2lgintbup,
  years_count_ind_2_v2lgintblo = year_count_ind_2_v2lgintblo,
  items_2_v2lgintblo = ind_2_v2lgintblo,
  n_2_v2lgintblo = n_2_v2lgintblo,
  years_count_ind_2_v2lgqstexp = year_count_ind_2_v2lgqstexp,
  items_2_v2lgqstexp = ind_2_v2lgqstexp,
  n_2_v2lgqstexp = n_2_v2lgqstexp,
  years_count_ind_3_v2lgbicam = year_count_ind_3_v2lgbicam,
  items_3_v2lgbicam = ind_3_v2lgbicam,
  n_3_v2lgbicam = n_3_v2lgbicam,
  n_years_count = max(df$count.year.id)
)

# --- RUN THE MODEL --------------------------------------------------------
set.seed(1)

mod.dyn <- stan(file="static.stan",
                data=stan.data, seed=570175513, thin = 5,
                iter=10000)

# --- EXTRACT ESTIMATES --------------------------------------------------------
out <- extract(mod.dyn)
df <- df[order(df$count.year.id),]
df$static.estimates <- apply(out$thetas, 2, median)
df$static.up <- apply(out$thetas, 2, quantile, 0.975) 
df$static.lo <- apply(out$thetas, 2, quantile, 0.025) 

write.csv(df, "estimates_static.csv")

# --- STAN FIT --------------------------------------------------------
saveRDS(mod.dyn, "stat.rds")

# load
mod.dyn <- readRDS("stat.rds")

# --- CONVERGENCE --------------------------------------------------------
params <- c("innov", "c_n_4_v2exdfdshs", "c_n_5_v2exdfvths", "c_n_3_v2exdfpphs", "c_n_5_v2lginvstp", 
            "c_n_3_v2lgoppart", "c_n_2_v2lgfunds", "c_n_4_v2lgcomslo", "c_n_2_v2lgintbup", "c_n_2_v2lgintblo",
            "c_n_2_v2lgqstexp", "c_n_3_v2lgbicam", paste("beta[", 1:11, "]", sep=""))

params.all <- names(mod.dyn)
params.use <- c(grep("thetas", params.all, value=T), grep(paste(params, collapse="|"), params.all, value=T),
                params[params %in% params.all])

pdf("static_convergence.pdf", height=6, width=6)
stan_rhat(mod.dyn, pars = params.use, 
          fill="springgreen")
dev.off()

# --- CLEAN --------------------------------------------------------
rm(mod.dyn)
sapply(1:5, gc)
